{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Run Harmonics on STARmap V1C dataset\n", "Need additional packages: scanpy seaborn networkx" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the packages" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "import os\n", "import time\n", "import scanpy as sc\n", "import pandas as pd\n", "import numpy as np\n", "import anndata as ad\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "from Harmonics import *\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "sc.settings.verbosity = 0\n", "sc.settings.set_figure_params(dpi=50, dpi_save=500)\n", "\n", "from matplotlib import rcParams\n", "rcParams[\"figure.dpi\"] = 50\n", "rcParams[\"savefig.dpi\"] = 500\n", "rcParams['pdf.fonttype'] = 42\n", "rcParams['svg.fonttype'] = 'none'\n", "rcParams['ps.fonttype'] = 42\n", "# rcParams['font.family'] = 'Arial'\n", "rcParams['savefig.transparent'] = True" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data_dir = '../../../Data/Spatial/Transcriptomics/STARmap_V1_Wang2018/'\n", "save_dir = f'../../results/STARmap_V1_Wang2018/Harmonics/'\n", "fig_dir = save_dir + 'figs/'\n", "if not os.path.exists(fig_dir):\n", " os.makedirs(fig_dir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define a function to match the niche label to ground truth annotation and a function to change p values to corresponding star representation, used to show the results of additional tests implemented in Harmonics." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import networkx as nx\n", "\n", "def match_cluster_labels(true_labels, est_labels):\n", " true_labels_arr = np.array(list(true_labels))\n", " est_labels_arr = np.array(list(est_labels))\n", "\n", " org_cat = list(np.sort(list(pd.unique(true_labels))))\n", " est_cat = list(np.sort(list(pd.unique(est_labels))))\n", "\n", " B = nx.Graph()\n", " B.add_nodes_from([i + 1 for i in range(len(org_cat))], bipartite=0)\n", " B.add_nodes_from([-j - 1 for j in range(len(est_cat))], bipartite=1)\n", "\n", " for i in range(len(org_cat)):\n", " for j in range(len(est_cat)):\n", " weight = np.sum((true_labels_arr == org_cat[i]) * (est_labels_arr == est_cat[j]))\n", " B.add_edge(i + 1, -j - 1, weight=-weight)\n", "\n", " match = nx.algorithms.bipartite.matching.minimum_weight_full_matching(B)\n", "\n", " if len(org_cat) >= len(est_cat):\n", " return np.array([match[-est_cat.index(c) - 1] - 1 for c in est_labels_arr])\n", " else:\n", " unmatched = [c for c in est_cat if not (-est_cat.index(c) - 1) in match.keys()]\n", " l = []\n", " for c in est_labels_arr:\n", " if (-est_cat.index(c) - 1) in match:\n", " l.append(match[-est_cat.index(c) - 1] - 1)\n", " else:\n", " l.append(len(org_cat) + unmatched.index(c))\n", " return np.array(l)\n", " \n", " \n", "def p2stars(p):\n", " if p < 0.001:\n", " return '***'\n", " elif p < 0.01:\n", " return '**'\n", " elif p < 0.05:\n", " return '*'\n", " else:\n", " return ''" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 1207 × 1020\n", " obs: 'clusterid', 'celltype', 'layer'\n", " obsm: 'spatial'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "slice_name_list = ['V1']\n", "adata = ad.read_h5ad(data_dir + 'processed/STARmap_V1Cortex.h5ad')\n", "adata" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "celltype\n", "eL4 189\n", "eL2/3 176\n", "eL6-2 156\n", "Oligo 154\n", "Astro-2 105\n", "Endo 86\n", "eL6-1 80\n", "eL5 69\n", "Micro 52\n", "PVALB 31\n", "Reln 27\n", "Astro-1 26\n", "SST 23\n", "VIP 13\n", "HPC 10\n", "Smc 10\n", "Name: count, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata.obs['celltype'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Run Harmonics" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Instantiate Harmonics" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset comprises 1 slices, 1207 cells/spots in total.\n" ] } ], "source": [ "iter=0\n", "model = Harmonics_Model(adata, \n", " slice_name_list,\n", " cond_list=None, # default\n", " cond_name_list=None, # default\n", " concat_label='slice_name', # default\n", " proportion_label=None, # default\n", " seed=1234+iter, # default\n", " parallel=False, # recommand to set to True for large dataset and False for small dataset\n", " verbose=True, # default\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Preprocess the data (Generating the connection graph and calculating neighborhood cell type destribution for cells)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating Delaunay neighbor graph...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 1/1 [00:00<00:00, 82.56it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "All done!\n", "\n", "Performing graph completion...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 1/1 [00:00<00:00, 6.91it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "All done!\n", "\n", "The cell types of interest are:\n", "Astro-1\n", "Astro-2\n", "Endo\n", "HPC\n", "Micro\n", "Oligo\n", "PVALB\n", "Reln\n", "SST\n", "Smc\n", "VIP\n", "eL2/3\n", "eL4\n", "eL5\n", "eL6-1\n", "eL6-2\n", "\n", "Generating one-hot matrix...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/1 [00:00" ] }, "metadata": { "image/png": { "height": 189, "width": 338 } }, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Done!\n", "\n" ] } ], "source": [ "adata_list, adata_concat = model.select_solution(n_niche=None, # default \n", " niche_key=f'niche_label', # default \n", " auto=True, # default \n", " metric='jsd_v2', # default\n", " threshold=0.1, # default\n", " return_adata=True, # default\n", " plot=True, # default\n", " save=False, # default\n", " fig_size=(7, 4), \n", " save_dir=save_dir, \n", " file_name=f'score_vs_nichecount_basic_{iter}.pdf',\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Save the result" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "adata_concat.write_h5ad(save_dir + f'Harmonics_result_{iter}.h5ad')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# adata = ad.read_h5ad(save_dir + f'Harmonics_result_0.h5ad')\n", "# adata" ] } ], "metadata": { "kernelspec": { "display_name": "CNI", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }